R Markdown

library(tidyverse)
## -- Attaching packages ------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ---------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(stringr)
library(tidyverse)
library(gganimate)
library(png)
options(warn = - 1)  
gdp <- read.csv("GDP_Data.csv")
str(gdp)
## 'data.frame':    269 obs. of  16 variables:
##  $ ï..Series.Name: chr  "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" "GDP (current US$)" ...
##  $ Series.Code   : chr  "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" "NY.GDP.MKTP.CD" ...
##  $ Country.Name  : chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ Country.Code  : chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ X1990..YR1990.: chr  ".." "2028553750" "62045099642.7774" ".." ...
##  $ X2000..YR2000.: chr  ".." "3480355258.04122" "54790245600.5846" ".." ...
##  $ X2009..YR2009.: chr  "12439087076.7667" "12044208085.864" "137211039898.193" "678000000" ...
##  $ X2010..YR2010.: chr  "15856574731.4411" "11926957254.6288" "161207268655.392" "576000000" ...
##  $ X2011..YR2011.: chr  "17804292964.1045" "12890866742.6533" "200019057307.655" "574000000" ...
##  $ X2012..YR2012.: chr  "19907317065.6667" "12319784886.2038" "209058991952.125" "644000000" ...
##  $ X2013..YR2013.: chr  "20561069558.2152" "12776280961.155" "209755003250.664" "641000000" ...
##  $ X2014..YR2014.: chr  "20484885119.7348" "13228247844.1247" "213810022462.428" "643000000" ...
##  $ X2015..YR2015.: chr  "19907111418.9938" "11386931489.7968" "165979277276.907" "661000000" ...
##  $ X2016..YR2016.: chr  "19046357714.4928" "11883682170.8236" "160129866569.935" "653000000" ...
##  $ X2017..YR2017.: chr  "19543976895.4248" "13038538300.2644" "167555280113.181" "634000000" ...
##  $ X2018..YR2018.: chr  ".." ".." ".." ".." ...
head(gdp)
##      ï..Series.Name    Series.Code   Country.Name Country.Code   X1990..YR1990.
## 1 GDP (current US$) NY.GDP.MKTP.CD    Afghanistan          AFG               ..
## 2 GDP (current US$) NY.GDP.MKTP.CD        Albania          ALB       2028553750
## 3 GDP (current US$) NY.GDP.MKTP.CD        Algeria          DZA 62045099642.7774
## 4 GDP (current US$) NY.GDP.MKTP.CD American Samoa          ASM               ..
## 5 GDP (current US$) NY.GDP.MKTP.CD        Andorra          AND 1029048481.88051
## 6 GDP (current US$) NY.GDP.MKTP.CD         Angola          AGO 11228764963.1618
##     X2000..YR2000.   X2009..YR2009.   X2010..YR2010.   X2011..YR2011.
## 1               .. 12439087076.7667 15856574731.4411 17804292964.1045
## 2 3480355258.04122  12044208085.864 11926957254.6288 12890866742.6533
## 3 54790245600.5846 137211039898.193 161207268655.392 200019057307.655
## 4               ..        678000000        576000000        574000000
## 5 1434429703.33518 3660530702.97305 3355695364.23841 3442062830.13622
## 6 9129594818.60749 70307163678.1895 83799496611.6049  111789686464.26
##     X2012..YR2012.   X2013..YR2013.   X2014..YR2014.   X2015..YR2015.
## 1 19907317065.6667 20561069558.2152 20484885119.7348 19907111418.9938
## 2 12319784886.2038  12776280961.155 13228247844.1247 11386931489.7968
## 3 209058991952.125 209755003250.664 213810022462.428 165979277276.907
## 4        644000000        641000000        643000000        661000000
## 5 3164615186.94591 3281585236.32501 3350736367.25488 2811489408.89431
## 6 128052853643.447 136709862831.308 145712200312.505 116193649124.475
##     X2016..YR2016.   X2017..YR2017. X2018..YR2018.
## 1 19046357714.4928 19543976895.4248             ..
## 2 11883682170.8236 13038538300.2644             ..
## 3 160129866569.935 167555280113.181             ..
## 4        653000000        634000000             ..
## 5 2877311946.90265 3012914131.16971             ..
## 6 101123851090.473  122123822333.73             ..
#select required columns
gdp <- gdp %>% select(3:15)

#filter only country rows
gdp <- gdp[1:217,]

gdp_tidy <- gdp %>% 
  mutate_at(vars(contains("YR")),as.numeric) %>% 
  gather(year,value,3:13) %>% 
  janitor::clean_names() %>% #fix column name
  mutate(year = as.numeric(stringr::str_sub(year,1,4)))


summary(gdp_tidy)
##  country_name       country_code            year          value          
##  Length:2387        Length:2387        Min.   : NA    Min.   :8.824e+06  
##  Class :character   Class :character   1st Qu.: NA    1st Qu.:4.435e+09  
##  Mode  :character   Mode  :character   Median : NA    Median :2.020e+10  
##                                        Mean   :NaN    Mean   :3.241e+11  
##                                        3rd Qu.: NA    3rd Qu.:1.384e+11  
##                                        Max.   : NA    Max.   :1.939e+13  
##                                        NA's   :2387   NA's   :193
str(gdp_tidy)
## 'data.frame':    2387 obs. of  4 variables:
##  $ country_name: chr  "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ country_code: chr  "AFG" "ALB" "DZA" "ASM" ...
##  $ year        : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ value       : num  NA 2.03e+09 6.20e+10 NA 1.03e+09 ...
head(gdp_tidy)
##     country_name country_code year       value
## 1    Afghanistan          AFG   NA          NA
## 2        Albania          ALB   NA  2028553750
## 3        Algeria          DZA   NA 62045099643
## 4 American Samoa          ASM   NA          NA
## 5        Andorra          AND   NA  1029048482
## 6         Angola          AGO   NA 11228764963
gdp_tidy <- read_csv("gdp_tidy.csv")
## Parsed with column specification:
## cols(
##   country_name = col_character(),
##   country_code = col_character(),
##   year = col_double(),
##   value = col_double()
## )
summary(gdp_tidy)
##  country_name       country_code            year          value          
##  Length:2387        Length:2387        Min.   :1990   Min.   :8.824e+06  
##  Class :character   Class :character   1st Qu.:2009   1st Qu.:4.435e+09  
##  Mode  :character   Mode  :character   Median :2012   Median :2.020e+10  
##                                        Mean   :2010   Mean   :3.241e+11  
##                                        3rd Qu.:2015   3rd Qu.:1.384e+11  
##                                        Max.   :2017   Max.   :1.939e+13  
##                                                       NA's   :193
str(gdp_tidy)
## tibble [2,387 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ country_name: chr [1:2387] "Afghanistan" "Albania" "Algeria" "American Samoa" ...
##  $ country_code: chr [1:2387] "AFG" "ALB" "DZA" "ASM" ...
##  $ year        : num [1:2387] 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
##  $ value       : num [1:2387] NA 2.03e+09 6.20e+10 NA 1.03e+09 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   country_name = col_character(),
##   ..   country_code = col_character(),
##   ..   year = col_double(),
##   ..   value = col_double()
##   .. )
head(gdp_tidy)
## # A tibble: 6 x 4
##   country_name   country_code  year        value
##   <chr>          <chr>        <dbl>        <dbl>
## 1 Afghanistan    AFG           1990          NA 
## 2 Albania        ALB           1990  2028553750 
## 3 Algeria        DZA           1990 62045099643.
## 4 American Samoa ASM           1990          NA 
## 5 Andorra        AND           1990  1029048482.
## 6 Angola         AGO           1990 11228764963.
gdp_formatted <- gdp_tidy %>%
  group_by(year) %>%
  # The * 1 makes it possible to have non-integer ranks while sliding
  mutate(rank = rank(-value),
         Value_rel = value/value[rank==1],
         Value_lbl = paste0(" ",round(value/1e9))) %>%
  group_by(country_name) %>% 
  filter(rank <=10) %>%
  ungroup()

staticplot = ggplot(gdp_formatted, aes(rank, group = country_name, 
                fill = as.factor(country_name), color = as.factor(country_name))) +
  geom_tile(aes(y = value/2,
                height = value,
                width = 0.9), alpha = 0.8, color = NA) +
  geom_text(aes(y = 0, label = paste(country_name, " ")), vjust = 0.2, hjust = 1) +
  geom_text(aes(y=value,label = Value_lbl, hjust=0)) +
  coord_flip(clip = "off", expand = FALSE) +
  scale_y_continuous(labels = scales::comma) +
  scale_x_reverse() +
  guides(color = FALSE, fill = FALSE) +
  theme(axis.line=element_blank(),
        axis.text.x=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks=element_blank(),
        axis.title.x=element_blank(),
         axis.title.y=element_blank(),
        legend.position="none",
        panel.background=element_blank(),
        panel.border=element_blank(),
        panel.grid.major=element_blank(),
        panel.grid.minor=element_blank(),
        panel.grid.major.x = element_line( size=.1, color="grey" ),
        panel.grid.minor.x = element_line( size=.1, color="grey" ),
        plot.title=element_text(size=25, hjust=0.5, face="bold", colour="grey", vjust=-1),
        plot.subtitle=element_text(size=18, hjust=0.5, face="italic", color="grey"),
        plot.caption =element_text(size=8, hjust=0.5, face="italic", color="grey"),
        plot.background=element_blank(),
       plot.margin = margin(2,2, 2, 4, "cm"))


anim = staticplot + transition_states(year, transition_length = 4, state_length = 1) +
  view_follow(fixed_x = TRUE)  +
  labs(title = 'GDP per Year : {closest_state}',  
       subtitle  =  "Top 10 Countries",
       caption  = "GDP in Billions USD | Data Source: World Bank Data")

# For GIF
animate(anim, 200, fps = 20,  width = 1200, height = 1000, 
        renderer = gifski_renderer("gganim.gif"))

### Gráficos de barras animados en R

El objetivo de esta publicación es explicar cómo construir gráficos animados de barras usando R.

Paquetes

Los paquetes que se requieren para construir tramas animadas en R son:

  • ggplot2
  • gganimate

Si bien los anteriores son los paquetes esenciales, también hemos utilizado tidyverse, janitor y scales en este proyecto para la manipulación, limpieza y formateo de datos.

Datos

El conjunto de datos original utilizado para este proyecto se descarga desde WorldBank Data. El mismo archivo csv se puede encontrar en la carpeta del proyecto.

Sobre los datos:

Estos datos contienen el valor del PIB de la mayoría de los países durante varios años (especialmente entre 2000 y 2017).

Preprocesamiento de datos:

Usaremos el siguiente código para preparar nuestros datos en el formato deseado. En realidad, estamos limpiando los nombres de las columnas, escribiendo los números en formato numérico y convirtiendo los datos de formato ancho a formato largo utilizando la función de gather() de tidyr. Los datos ordenados se guardan en un nuevo archivo csv gdp_tidy.csv para su uso posterior.